One variable plots
#Density plot
ggplot(nhanes_small, aes( x = bmi)) +
geom_density()
## Warning: Removed 366 rows containing non-finite values (stat_density).

#histogram
ggplot(nhanes_small, aes(x = bmi)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 366 rows containing non-finite values (stat_bin).

ggplot(nhanes_small, aes (x = sex)) +
geom_bar()

ggplot(nhanes_small, aes(x = diabetes)) +
geom_bar()

nhanes_tidied <- nhanes_small %>%
filter(!is.na(diabetes))
# Safe the new dataframe overwrite is the old data set
usethis::use_data(nhanes_tidied, overwrite = T)
## ✓ Setting active project to '/Users/schaarup/Desktop/LearningR'
## ✓ Saving 'nhanes_tidied' to 'data/nhanes_tidied.rda'
## • Document your data (see 'https://r-pkgs.org/data.html')
ggplot(nhanes_tidied, aes(x= diabetes))+
geom_bar()

Plotting two variables
bmi_chol <- ggplot(nhanes_tidied, aes (x = bmi, y = tot_chol))
# doing a scatter plot
bmi_chol+
geom_point()
## Warning: Removed 1457 rows containing missing values (geom_point).

bmi_chol+
geom_hex()
## Warning: Removed 1457 rows containing non-finite values (stat_binhex).

bmi_chol+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1457 rows containing non-finite values (stat_smooth).

bmi_chol +
geom_hex()+
geom_smooth()
## Warning: Removed 1457 rows containing non-finite values (stat_binhex).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1457 rows containing non-finite values (stat_smooth).

# With diabetes as the x-axis and the categorical amount distributed in sex
two_discrete <- nhanes_tidied %>%
ggplot(aes(x = diabetes, fill = sex))
two_discrete+
geom_bar()

# Further separation
two_discrete +
geom_bar(position = position_dodge())

two_mixed <- nhanes_tidied %>%
ggplot(aes(x = diabetes, y = bmi))
two_mixed+
geom_boxplot()
## Warning: Removed 229 rows containing non-finite values (stat_boxplot).

two_mixed+
geom_jitter()
## Warning: Removed 229 rows containing missing values (geom_point).

two_mixed +
geom_violin()
## Warning: Removed 229 rows containing non-finite values (stat_ydensity).

Exercise 11.5
# 1a. Distribution of age
ggplot(nhanes_tidied, aes(x = age)) +
geom_histogram(bin=30)
## Warning: Ignoring unknown parameters: bin
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# 1b. Distribution of age of diabetes diagnosis
ggplot(nhanes_tidied, aes(x = diabetes_age)) +
geom_histogram(bins=30)
## Warning: Removed 9229 rows containing non-finite values (stat_bin).

# 2a. Number of people who smoke now
ggplot(nhanes_tidied, aes(x = smoke_now)) +
geom_bar()

# 2b. Number of people who are physically active
ggplot(nhanes_tidied, aes(x = phys_active)) +
geom_bar()

# 3a. BMI in relation to systolic blood pressure
ggplot(nhanes_tidied, aes(x = bmi, y = bp_sys_ave)) +
geom_hex()
## Warning: Removed 1376 rows containing non-finite values (stat_binhex).

# 3b. BMI relation to diastolic blood pressure
ggplot(nhanes_tidied, aes(x = bmi, y = bp_dia_ave)) +
geom_hex()
## Warning: Removed 1376 rows containing non-finite values (stat_binhex).

# 4. Physically active people with or without diabetes
ggplot(nhanes_tidied, aes(x = diabetes, fill = phys_active)) +
geom_bar(position = position_dodge())

# 5. Poverty levels between those with or without diabetes
ggplot(nhanes_tidied, aes(x = diabetes, y = poverty)) +
geom_violin()
## Warning: Removed 710 rows containing non-finite values (stat_ydensity).

Plotting three or more variables
# 1 continous and two discrite variables
nhanes_tidied %>%
ggplot(aes(x = sex, y = bp_sys_ave, colour = diabetes))+
geom_boxplot()
## Warning: Removed 1312 rows containing non-finite values (stat_boxplot).

# 3 continous variables
nhanes_tidied %>%
ggplot( aes(x = bmi, y = bp_sys_ave, colour= age)) +
geom_point()
## Warning: Removed 1376 rows containing missing values (geom_point).

# 2 continous and 1 discrete variable
nhanes_tidied %>%
ggplot(aes( x = bmi, y = bp_sys_ave, colour= diabetes))+
geom_point()
## Warning: Removed 1376 rows containing missing values (geom_point).

# Side-by side
nhanes_tidied %>%
ggplot(aes( x = bmi, y= bp_sys_ave))+
geom_point()+
facet_grid(cols = vars(diabetes))
## Warning: Removed 1376 rows containing missing values (geom_point).

# stacked
nhanes_tidied %>%
ggplot(aes( x = bmi, y= bp_sys_ave))+
geom_point()+
facet_grid(rows = vars(diabetes))
## Warning: Removed 1376 rows containing missing values (geom_point).

# 5-variables
nhanes_tidied %>%
ggplot(aes(x = bmi, y = bp_sys_ave, colour= age))+
geom_point()+
facet_grid(rows = vars(diabetes), cols = vars(sex))
## Warning: Removed 1376 rows containing missing values (geom_point).
